#=================================================================================#
# Title: Determinants of Rejected Mail Ballots in Georgia’s 2018 General Election
# Authors: Enrijeta Shino, Mara Suttmann-Lea, and Daniel A. Smith
# Journal: Political Research Quarterly
# Year: 2021
#=================================================================================#

# Clean the environment
rm(list = ls())


# Load libraries
library(foreign)
library(poliscidata)
library(data.table)
library(questionr)
library(descr)
library(ggplot2)
library(plyr)
library(ggrepel)
library(Hmisc)
library(dplyr)
library(tm)
library(stringr)
library(reshape2)
library(quanteda)
library(tidytext)
library(ggplot2)
library(tidytext)
library(data.table)
library(gutenbergr)
library(tidyr)
library(scales)
library(gridExtra)
library(sandwich)
library(wrapr)
library(lubridate)
library(SortedEffects)
library(lmtest)
library(multiwayvcov)
library(tidyverse)
library(tidyft)


# Upload the cleaned dataset
load("master_dataset.RData")


#====================#
# VBM voters dataset #
#====================#

master_dataset <- data.table(master_dataset)

vbm_voters <- master_dataset[VBM_voter == 1]

# Descriptives
table(vbm_voters$vbm_rejected)
table(vbm_voters$late.rejected.vbm)
table(vbm_voters$ontime.rejected.vbm)


#==================================#
# Late VBM rejected voters dataset #
#==================================#

# Race
# White = 0
# Black = 1
# Hispanic = 2
# Asian = 3
# Other race = 4
xtp(vbm_voters, late.rejected.vbm, race1)

# Age 18-22 = 1
# 23-29 = 2
# 30-34 = 3
# Age 35-44 = 4
# Age 45-59 = 5
# Age 60+ = 6
xtp(vbm_voters, late.rejected.vbm, age2)

# Registration year
# 1 = 2018
# 0 = before 2018
xtp(vbm_voters, late.rejected.vbm, reg.year18)

#
xtp(vbm_voters, late.rejected.vbm, GENDER)


#=====================================#
# On-Time VBM rejected voters dataset #
#=====================================#

# Race
# White = 0
# Black = 1
# Hispanic = 2
# Asian = 3
# Other race = 4
xtp(vbm_voters, ontime.rejected.vbm, race1)

# Age 18-22 = 1
# 23-29 = 2
# 30-34 = 3
# Age 35-44 = 4
# Age 45-59 = 5
# Age 60+ = 6
xtp(vbm_voters, ontime.rejected.vbm, age2)

# Registration year
# 1 = 2018
# 0 = before 2018
xtp(vbm_voters, ontime.rejected.vbm, reg.year18)

#
xtp(vbm_voters, ontime.rejected.vbm, GENDER)


#=================================#
# All VBM rejected voters dataset #
#=================================#

# Race
# White = 0
# Black = 1
# Hispanic = 2
# Asian = 3
# Other race = 4
xtp(vbm_voters, vbm_rejected, race1)

# Age 18-22 = 1
# 23-29 = 2
# 30-34 = 3
# Age 35-44 = 4
# Age 45-59 = 5
# Age 60+ = 6
xtp(vbm_voters, vbm_rejected, age2)

# Registration year
# 1 = 2018
# 0 = before 2018
xtp(vbm_voters, vbm_rejected, reg.year18)

#
xtp(vbm_voters, vbm_rejected, GENDER)


#=======================================#
# Table 2: Ordinary Least Square Models #
#=======================================#

# Late delivered VBM ballot rejections
myOutcome.late <- lm(vbm_rejected ~ factor(race1) + reg.year18 + factor(age2) + female + gender_other + county_name,
                     data = master_dataset[master_dataset$VBM_voter == 1 & (master_dataset$late.rejected.vbm==0 |
                                                                              master_dataset$late.rejected.vbm==1), ])
summary(myOutcome.late)

# Huber-White robust standard errors, clustered by county
coeftest(myOutcome.late, vcov=vcovHC(myOutcome.late, type="HC0", cluster="county_name"))


# On time delivered VBM ballot rejections
myOutcome.ontime <- lm(vbm_rejected ~ factor(race1) + reg.year18 + factor(age2) + female + gender_other + county_name,
                       data = master_dataset[master_dataset$VBM_voter == 1 & (master_dataset$ontime.rejected.vbm==0 |
                                                                                master_dataset$ontime.rejected.vbm==1), ])
summary(myOutcome.ontime)

# Huber-White robust standard errors, clustered by county
coeftest(myOutcome.ontime, vcov=vcovHC(myOutcome.ontime, type="HC0", cluster="county_name"))


# Estimate the outcome equation for rejected VBMs
myOutcome.all <- lm(vbm_rejected ~ factor(race1) + reg.year18 + factor(age2) + female + gender_other + county_name,
                    data = master_dataset[master_dataset$VBM_voter == 1, ])
summary(myOutcome.all)

# Huber-White robust standard errors, clustered by county
coeftest(myOutcome.all, vcov=vcovHC(myOutcome.all, type="HC0", cluster="county_name"))


#==============#
# Model Output #
#==============#

library(lmtest)  
library(sandwich)
library(texreg)
library(xtable)

texreg(list(myOutcome.late, myOutcome.ontime, myOutcome.all),
       caption="Ordinary Least Squares Models",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("myOutcome.late", "myOutcome.ontime", "myOutcome.all"),
       override.se=list(summary(myOutcome.late)$coef[,2],
                        summary(myOutcome.ontime)$coef[,2],
                        summary(myOutcome.all)$coef[,2],
                        override.pval=list(summary(myOutcome.late)$coef[,4],
                                           summary(myOutcome.ontime)$coef[,4],
                                           summary(myOutcome.all)$coef[,4])))



#===================================#
# Subset to Only Include VBM Voters #
#===================================#

load("vbm_AorR.RData")

# Inlcudes VBM voters and those with status_reason 1
vbm_voter <- subset(master_dataset, (master_dataset$VBM_voter == 1) |
                      (master_dataset$status_reason == 1))
table(vbm_voter$ballot_status, useNA = "ifany")


# Include VBM voters (accepted or rejected), eliminating those with Ballot.Status == C or S
vbm_AorR <- subset(vbm_voter, (vbm_voter$Ballot.Status == "A") |
                     (vbm_voter$Ballot.Status == "R"))
table(vbm_AorR$ballot_status, useNA = "ifany")


#Include Only Mailed ballot style.
vbm_AorR <- subset(vbm_AorR, (vbm_AorR$Ballot.Style == "MAILED"))
table(vbm_AorR$ballot_status, useNA = "ifany")

# Crosstab for ballot status and race
xtp(vbm_AorR, Ballot.Status, race1)


# % of mailed ballots by county
vbm_AorR <- data.table(vbm_AorR)

# Number of vbm rejected
vbm_AorR[, nr_vbm := .N, by = .(county_code)]
vbm_AorR[, nr_vbm_AR := .N, by = .(county_code,ballot_status)]
vbm_AorR[, prop_vbm_AR := (nr_vbm_AR/nr_vbm)*100]


# Number of vbm rejected by race
vbm_AorR[, nr_vbm_AR_race := .N, by = .(county_code, ballot_status, race1)]
vbm_AorR[, prop_vbm_AR_race := (nr_vbm_AR_race/nr_vbm)*100]


# Reformat registration date, separated by "-"
# vbm_AorR$reg.date <- ymd(vbm_AorR$REGISTRATION_DATE)
# 
# # Split registration year, month, and day
# vbm_AorR = vbm_AorR %>%
#   separate(reg.date, sep="-", into = c("reg.year", "reg.month", "reg.day"))

table(vbm_AorR$reg.year)

# Recode registration year:
# 1 = 2018
# 0 = before 2018

vbm_AorR$reg.year18 <- ifelse(vbm_AorR$reg.year == 2018, 1, 0)
table(vbm_AorR$reg.year18)


# Youth age group variable1:
# 1 = age 18-22; 0 = older
vbm_AorR$age1822 <- ifelse(vbm_AorR$age2 == 1, 1, 0)
table(vbm_AorR$age1822)


# Youth age group variable2:
# 1 = age 18-29; 0 = older
vbm_AorR$age1829 <- ifelse(vbm_AorR$age2 == 1 | vbm_AorR$age2 == 2, 1, 0)
table(vbm_AorR$age1829)


# Recode gender
vbm_AorR$male <- ifelse(vbm_AorR$GENDER == "M", 1, 0)
vbm_AorR$female <- ifelse(vbm_AorR$GENDER == "F", 1, 0)
vbm_AorR$gender_other <- ifelse(vbm_AorR$GENDER == "O", 1, 0)


# % of mailed ballots by county
vbm_AorR <- data.table(vbm_AorR)

vbm_AorR[!is.na(county_code), nr_vbm_voters := .N, by = .(county_code)]
vbm_AorR[!is.na(county_code) & !is.na(ballot_status),
         nr_vbm_AR := .N, by = .(county_code, ballot_status)]
vbm_AorR[ballot_status == "R", prop_vbm_R := (nr_vbm_AR/nr_vbm_voters)*100]



#===========#
# Heat maps #
#===========#

library(mapproj)
library(ggmap)
library(ggplot2)
library(plotly)
library(data.table)
library(tidyr)
library(dplyr)
library(plyr)
library(ggrepel)


# Counties in USA
counties <- map_data("county")

# Keep only the counties of Georgia
ga_county <- subset(counties, region == "georgia")
ga_df <- subset(counties, region == "georgia")

# Rename county name
names(ga_county)[6] <- "county_name"

ga_base <- ggplot(data = ga_df, mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black", fill = "gray")

ga_base + theme_nothing() +
  geom_polygon(data = ga_county, fill = NA, color = "black") +
  geom_polygon(color = "black", fill = NA)  # get the state border back on top


# Subset rejected Absentees only
vbm_R <- vbm_AorR[ballot_status == "R"]

# Merge the geo data and income data
vbm_R$county_name <- tolower(vbm_R$county_name)
gacopa <- inner_join(ga_county, vbm_R, by = "county_name")

# prepare to drop the axes and ticks but leave the guides and legends
# We can't just throw down a theme_nothing()!
ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)
library(reshape2)

map1 <- ga_base + geom_polygon(data = gacopa, aes(fill = prop_vbm_AR)) +
  geom_polygon(colour = "black", fill = NA, size = 0.2) + theme_bw() +
  ditch_the_axes + labs(fill = "VBM Rejection Rate by County") +
  theme(legend.position = "bottom", legend.justification = "center") +
  scale_fill_continuous(
    high = "black",
    low = "white",
    guide = guide_colorbar(
      direction = "horizontal",
      barheight = unit(2, units = "mm"),
      barwidth = unit(50, units = "mm"),
      draw.ulim = F,
      title.position = 'top',
      # some shifting around
      title.hjust = 0.5,
      label.hjust = 0.5))
map1


#========================#
# PLOTS, GRAPHS, FIGURES #
#========================#

# For this alternative specification, VBM voters are those who voted a "MAILED" ballot in the absentee file, or an "ELECTRONIC" ballot, regardless if they are in vote history as "Y"

# first subset to only include VBM voters (accepted or rejected) (disregarding if they are "Y" in vote history)
vbm_voter <- subset(master_dataset, (master_dataset$Ballot.Style == "MAILED" | master_dataset$Ballot.Style == "ELECTRONIC"))

# create a new subset to only include VBM voters (accepted or rejected), eliminating those with Ballot.Status == C or S
vbm_voter_AorR <- subset(vbm_voter, (vbm_voter$Ballot.Status == "A") | (vbm_voter$Ballot.Status == "R"))
table(vbm_voter_AorR$Ballot.Status, useNA = "ifany")


# Identify cases that have the same ballot issue date and ballot return date
# Fill empty rows with NAs
vbm_voter$Ballot.Issued.Date[vbm_voter$Ballot.Issued.Date==""] <- NA
vbm_voter$Ballot.Issued.Date[vbm_voter$Ballot.Return.Date==""] <- NA

# look at VBMs returned on the same day, which might indicate that they were not really "Mailed"
vbm_voter$sameissued_returndate <- ifelse(as.character(vbm_voter$Ballot.Issued.Date) == as.character(vbm_voter$Ballot.Return.Date), 1, 0)
# those coded as 1 have same day issue and return

# Dummy out Black registrants
vbm_voter$black <- ifelse(vbm_voter$race1 == 1, 1, 0)
table(vbm_voter$black)

# Dummy out White registrants
vbm_voter$white <- ifelse(vbm_voter$race1 == 0, 1, 0)
table(vbm_voter$white)


# Convert ballot.status to character and recode missing values
master_dataset$Ballot.Status <- master_dataset$Ballot.Status
master_dataset$Ballot.Status <- as.character(master_dataset$Ballot.Status)
master_dataset$Ballot.Status[master_dataset$Ballot.Status == ""] <- 999
master_dataset$Ballot.Status[master_dataset$Ballot.Status == 999] <- NA


# Remove ballot.status == C or S and code as NAs
master_dataset$Ballot.Status[master_dataset$Ballot.Status == "C"] <- NA
master_dataset$Ballot.Status[master_dataset$Ballot.Status == "S"] <- NA
table(master_dataset$Ballot.Status)

# VBM rejected
master_dataset$vbm_rejected <- ifelse(master_dataset$Ballot.Status == "R", 1, 0)
table(master_dataset$vbm_rejected, useNA = "ifany")


# PLOTS
# First make county names lower case and then capitalize the first letter
master_dataset$county_name <- tolower(master_dataset$county_name)
master_dataset$county_name <- capitalize(master_dataset$county_name)


# Number of registered voters per county
#master_dataset <- data.table(master_dataset_AorR)
master_dataset <- data.table(master_dataset)
master_dataset[, nr_voters_county := .N, by = county_code]
master_dataset[, .N, by = county_name]

table(master_dataset$nr_voters_county)
length(master_dataset$nr_voters_county)
master_dataset[, prop.table(table(county_name, ballot_status),1)]*100

### Number of registered voters by race and county (doesn't count the NAs)
master_dataset[!is.na(race), nr_race_county := .N, by = .(county_code,race)]

master_dataset[, prop_race := (nr_race_county/nr_voters_county)*100]
length(master_dataset)


#===========================#
# Code for Plots and Graphs #
#===========================#

vbm_voter_AorR <- data.table(vbm_voter_AorR)

# Create MAILED plot for races by first subsetting the data
master_dataset_bw <-  vbm_voter_AorR[race == "WH" | race == "BH"]

# Create x-axis: % of mailed ballots cast by race and county
# N_bc/(N_bc+N_wc) % of mailed ballots cast by black in county
# N_wc/(N_bc+N_wc) % of mailed ballots cast by white in county

master_dataset_bw <- data.table(master_dataset_bw)
master_dataset_bw[, nr_voters_BW := .N, by = county_code]
master_dataset_bw[, nr_voters_race := .N, by = .(county_code, race)]
master_dataset_bw[race == "BH", prop_voters_B := (nr_voters_race/nr_voters_BW)*100]
master_dataset_bw[race == "WH", prop_voters_W := (nr_voters_race/nr_voters_BW)*100]


# Create y-axis: % of mailed ballots by race that are rejected
# N_br/N_bc: % of mailed ballots by blacks that are rejected
master_dataset_bw[Ballot.Status == "R" & race == "BH", nr_voters_B_R := .N, by = .(county_code)]
master_dataset_bw[race == "BH", prop_voters_B_R := (nr_voters_B_R/nr_voters_race)*100]

# N_wr/N_wc: % of mailed ballots by white that are rejected
master_dataset_bw[Ballot.Status == "R" & race == "WH", nr_voters_W_R := .N, by = .(county_code)]
master_dataset_bw[race == "WH", prop_voters_W_R := (nr_voters_W_R/nr_voters_race)*100]

table(master_dataset_bw$prop_voters_B_R)
table(master_dataset_bw$prop_voters_W_R)


#============================#
# PLOTS: Hispanic vs. Whites #
#============================#

# Create MAILED plot for races by first subsetting the data
master_dataset_hw <-  vbm_voter_AorR[race == "WH" | race == "HP"]
table(master_dataset_hw$race, useNA = "ifany")
xtp(master_dataset_hw, Ballot.Issued.Date, race)
# ballot issued includs a couple very early and a couple after the election, and  103 on Nov 6, 2018, Election Day.


# Create x-axis: % of mailed ballots cast by race and county
# N_bc/(N_bc+N_wc) % of mailed ballots cast by black in county
# N_wc/(N_bc+N_wc) % of mailed ballots cast by white in county

master_dataset_hw <- data.table(master_dataset_hw)
master_dataset_hw[, nr_voters_HW := .N, by = county_code]
master_dataset_hw[, nr_voters_race := .N, by = .(county_code, race)]
master_dataset_hw[race == "HP", prop_voters_H := (nr_voters_race/nr_voters_HW)*100]
master_dataset_hw[race == "WH", prop_voters_W := (nr_voters_race/nr_voters_HW)*100]

# Create y-axis: % of mailed ballots by race that are rejected
# N_br/N_bc: % of mailed ballots by blacks that are rejected
master_dataset_hw[Ballot.Status == "R" & race == "HP", nr_voters_B_R := .N, by = .(county_code)]
master_dataset_hw[race == "HP", prop_voters_H_R := (nr_voters_B_R/nr_voters_race)*100]

# N_wr/N_wc: % of mailed ballots by white that are rejected
master_dataset_hw[Ballot.Status == "R" & race == "WH", nr_voters_W_R := .N, by = .(county_code)]
master_dataset_hw[race == "WH", prop_voters_W_R := (nr_voters_W_R/nr_voters_race)*100]


#================================================================#
# Appendix: New 45 Degree Plot: % VBM Rejected Blacks vs. Whites #
#================================================================#

d1_wb <- master_dataset_bw[, .(prop_voters_B, prop_voters_B_R, prop_voters_W_R, nr_voters_BW, county_name)]

# First make county names lower case and then capitalize the first letter
d1_wb$county_name <- tolower(d1_wb$county_name)
d1_wb$county_name <- capitalize(d1_wb$county_name)

# Data cleaning
setDT(d1_wb)[, prop_voters_B_R := prop_voters_B_R[!is.na(prop_voters_B_R)][1L] , by = county_name]
setDT(d1_wb)[, prop_voters_W_R := prop_voters_W_R[!is.na(prop_voters_W_R)][1L] , by = county_name]
setDT(d1_wb)[, prop_voters_B   := prop_voters_B[!is.na(prop_voters_B)][1L] , by = county_name]

d1_wb <- unique(d1_wb)


# Hide all of the county names
d1_wb$county_name1 <- ""

# Find the row index of each county name to show on the graph
index <- c(d1_wb[, .I[county_name == "Clay"]],
           d1_wb[, .I[county_name == "Warren"]],
           d1_wb[, .I[county_name == "Gwinnett"]],
           d1_wb[, .I[county_name == "Hancock"]],
           d1_wb[, .I[county_name == "Dekalb"]],
           d1_wb[, .I[county_name == "Glynn"]],
           d1_wb[, .I[county_name == "Fulton"]],
           d1_wb[, .I[county_name == "Taylor"]],
           d1_wb[, .I[county_name == "Polk"]],
           d1_wb[, .I[county_name == "Cobb"]],
           d1_wb[, .I[county_name == "Pickens"]],
           d1_wb[, .I[county_name == "Wilcox"]],
           d1_wb[, .I[county_name == "Candler"]],
           d1_wb[, .I[county_name == "Putnam"]],
           d1_wb[, .I[county_name == "Atkinson"]],
           d1_wb[, .I[county_name == "Mcintosh"]])

d1_wb$county_name1[index] <- d1_wb$county_name[index]



ggplot(data = d1_wb, aes(x = prop_voters_W_R, y = prop_voters_B_R,
                         label = ifelse(prop_voters_W_R > 1, as.character(county_name1), ""))) +
  geom_point(aes(size = d1_wb$nr_voters_BW), shape = 21, show.legend = FALSE) +
  coord_fixed() +
  scale_x_continuous(limits = c(0, 20), breaks = seq(from = 0, to = 20, by = 2)) +
  scale_y_continuous(limits = c(0, 20), breaks = seq(from = 0, to = 20, by = 2)) +
  xlab("White Rejection Rate") +
  ylab("Black Rejection Rate") +
  geom_abline(intercept = 0, slope = 1, colour = "grey") +
  # annotate("text", x = Inf, y = 0, label = "",
  #          hjust = 1.1, vjust = -1.1, col = "black", cex = 2.2,
  #          fontface = "bold", alpha = 0.8) +
  scale_fill_manual(guide = FALSE, values = c(NA, "black"))+
  geom_text_repel(size = 3, force = 7, show.legend = FALSE, segment.colour = NA) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black", size = 0.3))


#==============================================================#
# Appendix: 45 Degree Plot % VBM Rejected Hispanics vs. Whites #
#==============================================================#

d1_wh <- master_dataset_hw[, .(prop_voters_H, prop_voters_H_R, prop_voters_W_R, nr_voters_HW, county_name)]

# First make county names lower case and then capitalize the first letter
d1_wh$county_name <- tolower(d1_wh$county_name)
d1_wh$county_name <- capitalize(d1_wh$county_name)

# Data cleaning
setDT(d1_wh)[, prop_voters_H_R := prop_voters_H_R[!is.na(prop_voters_H_R)][1L] , by = county_name]
setDT(d1_wh)[, prop_voters_W_R := prop_voters_W_R[!is.na(prop_voters_W_R)][1L] , by = county_name]
setDT(d1_wh)[, prop_voters_H   := prop_voters_H[!is.na(prop_voters_H)][1L] , by = county_name]

d1_wh <- unique(d1_wh)


# Hide all of the county names
d1_wh$county_name1 <- ""

# Find the row index of each county name to show on the graph
index <- c(d1_wh[, .I[county_name == "Clay"]],
           d1_wh[, .I[county_name == "Warren"]],
           d1_wh[, .I[county_name == "Gwinnett"]],
           d1_wh[, .I[county_name == "Hancock"]],
           d1_wh[, .I[county_name == "Dekalb"]],
           d1_wh[, .I[county_name == "Glynn"]],
           d1_wh[, .I[county_name == "Fulton"]],
           d1_wh[, .I[county_name == "Taylor"]],
           d1_wh[, .I[county_name == "Polk"]],
           d1_wh[, .I[county_name == "Cobb"]],
           d1_wh[, .I[county_name == "Pickens"]],
           d1_wh[, .I[county_name == "Wilcox"]],
           d1_wh[, .I[county_name == "Candler"]],
           d1_wh[, .I[county_name == "Putnam"]],
           d1_wh[, .I[county_name == "Atkinson"]],
           d1_wh[, .I[county_name == "Mcintosh"]])

d1_wh$county_name1[index] <- d1_wh$county_name[index]


ggplot(data = d1_wh, aes(x = prop_voters_W_R, y = prop_voters_H_R,
                         label = ifelse(prop_voters_W_R > 1, as.character(county_name), ""))) +
  geom_point(aes(size = d1_wb$nr_voters_HW), shape = 21, show.legend = FALSE) +
  coord_fixed() +
  scale_x_continuous(limits = c(0, 20), breaks = seq(from = 0, to = 20, by = 2)) +
  scale_y_continuous(limits = c(0, 20), breaks = seq(from = 0, to = 20, by = 2)) +
  xlab("White Rejection Rate") +
  ylab("Hispanic Rejection Rate") +
  geom_abline(intercept = 0, slope = 1, colour = "grey") +
  # annotate("text", x = Inf, y = 0, label = "",
  #          hjust = 1.1, vjust = -1.1, col = "black", cex = 2.2,
  #          fontface = "bold", alpha = 0.8) +
  scale_fill_manual(guide = FALSE, values = c(NA, "black"))+
  geom_text_repel(size = 2, force = 7, show.legend = FALSE, segment.colour = NA) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black", size = 0.3))



# ==============================================================#
# Figure 2: CDF Plot for VBM Rejections by Age Group and County #
# ==============================================================#

vbm_AorR[, nr_vbm_age := .N, by = .(county_code, age2)]
vbm_AorR[, nr_vbm_AR_age := .N, by = .(county_code, age2, ballot_status)]
vbm_AorR[, prop_vbm_AR_age := (nr_vbm_AR_age/nr_vbm_age)*100]


rr <- vbm_AorR[vbm_rejected==1][,'prop_vbm_AR_age']
age_group <- vbm_AorR[vbm_rejected==1]$age2
nr <- vbm_AorR[vbm_rejected==1]$nr_vbm_age

dta_hist <- data.frame(rr = rr, age_group = age_group, nr = nr)
dta_hist$age_group <- as.factor(dta_hist$age_group)
dta_hist <- setDT(dta_hist)
dta_hist <- dta_hist[complete.cases(dta_hist),]
dta_hist <- dta_hist[nr >= 100]

# Use ggplot to get theCummulative Distribution Function

plot1 <- ggplot(dta_hist, aes(x = prop_vbm_AR_age, colour = age_group)) +
  stat_ecdf()

# Inverse Cummulative Distribution Function

pg_age <- ggplot_build(plot1)$data[[1]]
ggplot(pg_age, aes(x = x, y = 1-y, colour = as.factor(group))) + geom_step() +
  labs(x = 'Rejection Rates', y = TeX('Inverse Empirical CDF'), fill='') +
  theme_bw()+
  theme(axis.line = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.direction='vertical',
        legend.position = c(0.98, 0.75),
        legend.justification = c(1, 0.5),
        legend.background = element_rect(colour = NA)) +
  scale_x_continuous(breaks=seq(0,40,2.5)) +
  scale_y_continuous(breaks=seq(0,1,0.10)) +
  scale_colour_hue(name='Age Group', labels=c('18-22','23-29', '30-34', '35-44', '45-59', '60+'))



# ======================================================================#
# Figure 2: CDF Plot for VBM Rejections by Registration Time and County #
# ======================================================================#

vbm_AorR[, nr_vbm_newlyreg := .N, by = .(county_code, reg.year18)]
vbm_AorR[, nr_vbm_AR_newlyreg := .N, by = .(county_code, reg.year18, ballot_status)]
vbm_AorR[, prop_vbm_AR_newlyreg := (nr_vbm_AR_newlyreg/nr_vbm_newlyreg)*100]


rr_reg18 <- vbm_AorR[vbm_rejected==1][,'prop_vbm_AR_newlyreg']
reg18 <- vbm_AorR[vbm_rejected==1]$reg.year18
nr_reg18 <- vbm_AorR[vbm_rejected==1]$nr_vbm_newlyreg

dta_hist_reg18 <- data.frame(rr = rr_reg18, reg18 = reg18, nr = nr_reg18)
dta_hist_reg18$reg18 <- as.factor(dta_hist_reg18$reg18)
dta_hist_reg18 <- setDT(dta_hist_reg18)
dta_hist_reg18 <- dta_hist_reg18[complete.cases(dta_hist_reg18),]
dta_hist_reg18 <- dta_hist_reg18[nr >= 100]


# Use ggplot to get theCummulative Distribution Function

plot2 <- ggplot(dta_hist_reg18, aes(x = prop_vbm_AR_newlyreg, colour = reg18)) +
  stat_ecdf()

# Inverse Cummulative Distribution Function

pg_reg18 <- ggplot_build(plot2)$data[[1]]
ggplot(pg_reg18, aes(x = x, y = 1-y, colour = as.factor(group))) + geom_step() +
  labs(x = 'Rejection Rates', y = TeX('Inverse Empirical CDF'), fill='') +
  theme_bw()+
  theme(axis.line = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.direction='vertical',
        legend.position = c(0.98, 0.85),
        legend.justification = c(1, 0.5),
        legend.background = element_rect(colour = NA)) +
  scale_x_continuous(breaks=seq(0,14.5,1.5)) +
  scale_y_continuous(breaks=seq(0,1,0.10)) +
  scale_colour_hue(name='Registration Year', labels=c('Prior 2018','In 2018'))


#===============================================#
# Analyze reasons for rejected VBM ballots Only #
#===============================================#
reasons <-  master_dataset[, .(late.return1, Status.Reason, vbm_rejected, age2, race1, female, gender_other, late.rejected.vbm, ontime.rejected.vbm, reg.year18)]
reasons <- reasons[vbm_rejected == 1]
reasons <- reasons[complete.cases(Status.Reason)]

reasons$status_reason2 <- reasons$Status.Reason


# Remove special characters from the string
reasons$status_reason2 <- gsub('[\"]', '', reasons$status_reason2)

# There are 168 unique reasons
length(unique(reasons$Status.Reason))


# Reason coding:
# 17 = multiple issues such as missing yob, sign, oath, address 
reasons$status_reason3 <- with(reasons, 
                               # Late return
                               ifelse(status_reason2 == "Ballot Not Returned By Election Day" | 
                                        status_reason2 == "Ballot Received after Deadline" | 
                                        status_reason2 == "DID NOT COME IN" |
                                        status_reason2 == "NEVER CAME IN" |
                                        status_reason2 == "OATH NOT COMPLETED BY DEADLINE" |
                                        status_reason2 == "OATH NOT SIGNED BY DEADLINE" |
                                        status_reason2 == "RECEIVED AFTER DEADLINE/INSUFFICIENT " |
                                        status_reason2 == "RECEIVED OATH SIGNATURE AFTER " |
                                        status_reason2 == "RNS BY 11/9" |
                                        status_reason2 == "TOO LATE" |
                                        status_reason2 == "RECEIVED AFTER DEADLINE/INSUFFICIENT" |
                                        status_reason2 == "BALLOT RECEIVED AFTER DEADLINE /TURNED IN AT ", 1, 
                                      
                                      # Ballot returned late and undelivered but rejected
                                      ifelse(status_reason2 == "Ballot Returned Undeliverable" |
                                               status_reason2 == "BALLOT NOT DELIVERED" |
                                               status_reason2 == "BALLOT NOT DELIVERED", 999,      
                                             
                                             # Signature missing
                                             ifelse(status_reason2 == "BALLOT NOT SIGNED" |
                                                      status_reason2 == "MISSING SIGNATURE ON OATH" |
                                                      status_reason2 == "NO SIGNATURE" |
                                                      status_reason2 == "NO SIGNATURE ON ENVELOPE" |
                                                      status_reason2 == "NO SIGNATURE ON OATH" |
                                                      status_reason2 == "NO SIGNATURE. DID CALL HER" |
                                                      status_reason2 == "NO SIGNATURE ON YELLOW ENV. NO PHONE # " |
                                                      status_reason2 == "NO SIGNATURE/OATH NOT COMPLETED" |
                                                      status_reason2 == "NO SIGNATURE/OATH NOT COMPLETED" |
                                                      status_reason2 == "OATH NOT SIGNED" |
                                                      status_reason2 == "RECEIVED OATH SIGNATURE AFTER" |
                                                      status_reason2 == "OATH NOT SIGNED BY VOTER" |
                                                      status_reason2 == "SENT SIG MISMATCH LETTER, DIDNT FOLLOW" |
                                                      status_reason2 == "SIG MISSING" |
                                                      status_reason2 == "SIGNATURE DOES NOT MATCH" |
                                                      status_reason2 == "Signature Match" |
                                                      status_reason2 == "SIGNATURE MISMATCH" |
                                                      status_reason2 == "SIGNATURE MISSING" |
                                                      status_reason2 == "SIGNATURE NOT ON BALLOT" |
                                                      status_reason2 == "SIGNATURE NOT RESOLVED" |
                                                      status_reason2 == "SIGNATURE/INFO NOT ON BALLOT" |
                                                      status_reason2 == "DID NOT SIGN OATH OF ELECTOR" |
                                                      status_reason2 == "SENT SIG MISMATCH LETTER, DIDNT FOLLOW " |
                                                      status_reason2 == "SIGNED WRONG LINE" |
                                                      status_reason2 == "THE VOTER DIDN'T SIGN THE OATH OF ELECTOR" |
                                                      status_reason2 == "UNSIGNED INCOMPLETE OATH OF ELECTOR" |
                                                      status_reason2 == "FAIL TO SIGN THE OATH OF ELECTOR" |
                                                      status_reason2 == "VOTER SIGNED ACTUAL BALLOT" |
                                                      status_reason2 == "MISSING SIGNATURE", 2, 
                                                    
                                                    # DOB        
                                                    ifelse(status_reason2 == "BIRTHDATE DOES NOT MATCH" | 
                                                             status_reason2 == "DATE OF BIRTH DOES NOT MATCH AND NO" |
                                                             status_reason2 == "BIRTHDATE DOES NOT MATCH" |
                                                             status_reason2 == "BIRTHDAY DIDN'T MATCH" |
                                                             status_reason2 == "DATE OF BIRTH DOES NOT MATCH VR CARD ON FILE" |
                                                             status_reason2 == "DOB DOESN'T MATCH" | 
                                                             status_reason2 == "DOB INCORRECT" |
                                                             status_reason2 == "NO DOB" |
                                                             status_reason2 == "DATE OF BIRTH DOES NOT MATCH AND NO " |
                                                             status_reason2 == "DOB IS NOT A MATCH" | 
                                                             status_reason2 == "DOB NOT A MATCH" |
                                                             status_reason2 == "CURRENT YEAR AS YEAR OF BIRTH" |
                                                             status_reason2 == "CURRENT YEAR AS YOB" |
                                                             status_reason2 == "INCORRECT BIRTH DATE IN OATH" |
                                                             status_reason2 == "INCORRECT DATE OF BIRTH IN OATH" |
                                                             status_reason2 == "INCORRECT DOB" |
                                                             status_reason2 == "INVALID BIRTHDATE" |
                                                             status_reason2 == "LEFT DATE OF BIRTH BLANK" |
                                                             status_reason2 == "LEFT DOB BLANK ON OATH" |
                                                             status_reason2 == "WRONG DATE OF BIRTH" |
                                                             status_reason2 == "WRONG DATE OF BIRTH ON OATH ENVELOPE" |
                                                             status_reason2 == "WRONG DOB" |
                                                             status_reason2 == "YEAR OF BIRTH MISSING" |
                                                             status_reason2 == "YOB NOT A MATCH"|
                                                             status_reason2 == "BIRTHDAY DIDN'T MATCH", 3, 
                                                           # Oath       
                                                           ifelse(status_reason2 == "BLANK OATH" | 
                                                                    status_reason2 == "BLANK OATH OF ELCECTOR" |
                                                                    status_reason2 == "BLANK OATH OF ELECTOR" |
                                                                    status_reason2 == "BLANK OATH" |
                                                                    status_reason2 == "BLANK OATH OF ELECTORS" |
                                                                    status_reason2 == "BLANK OATH OF ELECTOR" |
                                                                    status_reason2 == "BLANK OATHOF ELECTOR" |
                                                                    status_reason2 == "BLANK OATHOF ELECTORS" |
                                                                    status_reason2 == "INCOMPLETE OATH" |
                                                                    status_reason2 == "LEFT OATH BLANK" |
                                                                    status_reason2 == "NO OATH" |
                                                                    status_reason2 == "LEFT OATH BLANK BUT SIGNED IT" |
                                                                    status_reason2 == "FAIL TO SIGN THE OATH OF ELECTOR" |
                                                                    status_reason2 == "DID NOT RETURN IN OATH ENVELOPE" |
                                                                    status_reason2 == "INSUFFICIENT OATH INFORMATION AND OATH " |
                                                                    status_reason2 == "Insufficient Oath Information" |
                                                                    status_reason2 == "MISSING OATH" | 
                                                                    status_reason2 == "INSUFFICIENT OATH INFORMATION AND OATH" |
                                                                    status_reason2 == "OAT INC" |
                                                                    status_reason2 == "OATH BLANK" |
                                                                    status_reason2 == "OATH COMPLETELY BLANK" |
                                                                    status_reason2 == "OATH INC" |
                                                                    status_reason2 == "OATH INCOMPLETE" |
                                                                    status_reason2 == "OATH MISSING(BLANK)" |
                                                                    status_reason2 == "OATH NOT COMPLETED" |
                                                                    status_reason2 == "OATH RECEIVED BLANK" |
                                                                    status_reason2 == "OATH RETURNED BLANK" |
                                                                    status_reason2 == "WRONG INFO ON OATH" |
                                                                    status_reason2 == "WRONG OATH INFO" |
                                                                    status_reason2 == "DID NOT SIGN OATH OF ELECTOR", 4,
                                                                  # Ineligible       
                                                                  ifelse(status_reason2 == "Changed to Ineligible Status" |
                                                                           status_reason2 == "FWAB/SWAB Ineligible", 5,
                                                                         # Deceased      
                                                                         ifelse(status_reason2 == "DECEASED OCTOBER 21 2018" |
                                                                                  status_reason2 == "VOTER IS DECEASED AS OF THIS MORNING" |
                                                                                  status_reason2 == "REC'D 11/08/18 AS BEING DECEASED", 6,
                                                                                # Ballot missing
                                                                                ifelse(status_reason2 == "DID NOT SEND BALLOT BACK WITH ENVELOPE" |
                                                                                         status_reason2 == "NO BALLOT ENCLOSED IN ENVELOPE" |
                                                                                         status_reason2 == "BALLOT NOT ENCLOSED/INSTRUCTION" |
                                                                                         status_reason2 == "NO BALLOT IN ENVELOPE" |
                                                                                         status_reason2 == "NO BALLOT INSIDE ENVELOPE" |
                                                                                         status_reason2 == "NOT ENCLOSED PROPERLY" |
                                                                                         status_reason2 == "BALLOT NOT DELIVERED" |
                                                                                         status_reason2 == "VOTER DID NOT SEND BALLOT BACK IN OATH " |
                                                                                         status_reason2 == "WRONG BALLOT WAS RETURN FROM 7-24-18" |
                                                                                         status_reason2 == "MAILED IN WRITE-IN PAPER", 7, 
                                                                                       # Address
                                                                                       ifelse(status_reason2 == "DIFFERENT ADDR RECORDED" |
                                                                                                status_reason2 == "INCORRECT ADDRESS AND COUNTY IN OATH" |
                                                                                                status_reason2 == "INCORRECT ADDRESS ON OATH" |
                                                                                                status_reason2 == "NO ADDRESS" |
                                                                                                status_reason2 == "NO RESIDENTIAL ADDRESS" |
                                                                                                status_reason2 == "INCORRECT RESIDENTIAL ADDRESS" |
                                                                                                status_reason2 == "NON MATCHING ADDRESS ON OATH ENVELOPE" |
                                                                                                status_reason2 == "OATH DOES NOT REFLECT RESIDENCE ADDRESS" |
                                                                                                status_reason2 == "REGISTERED ADRRESS IS MISSING/INCORRECT" |
                                                                                                status_reason2 == "RES ADDR MISSING" |
                                                                                                status_reason2 == "RES ADDR NOT A MATCH" |
                                                                                                status_reason2 == "RESI ADDR DID NOT MATCH" |
                                                                                                status_reason2 == "RESI ADDR NOT A MATCH" |
                                                                                                status_reason2 == "STREET ADDRESS MISSING" |
                                                                                                status_reason2 == "WRONG ADDRESS" |
                                                                                                status_reason2 == "WRONG COUNTY LISTED IN OATH" |
                                                                                                status_reason2 == "WRONG RESI ADDR" |
                                                                                                status_reason2 == "INCORRECT ADDRESS", 8, 
                                                                                              
                                                                                              # Wrong person
                                                                                              ifelse(status_reason2 == "ENTERED THE WRONG PERSON AS ACCEPTED" |
                                                                                                       status_reason2 == "NO PROOF OF CITIZENSHIP", 9, 
                                                                                                     # Felon       
                                                                                                     ifelse(status_reason2 == "FELON" |
                                                                                                              status_reason2 == "CANCELLED FELON" |
                                                                                                              status_reason2 == "VOTER IN CAN FELON STATUS", 10, 
                                                                                                            # Missing ID       
                                                                                                            ifelse(status_reason2 == "ID NOT PROVIDED" |
                                                                                                                     status_reason2 == "NO ID PROVIDED" |
                                                                                                                     status_reason2 == "PROVISIONAL ID NOT PORVIDED" |
                                                                                                                     status_reason2 == "PROVISIONAL INFORMATION NOT " |
                                                                                                                     status_reason2 == "PROVISIONAL ID NOT PROVIDED" |
                                                                                                                     status_reason2 == "MIDR-No ID", 11, 
                                                                                                                   # Info not on file       
                                                                                                                   ifelse(status_reason2 == "INFO NOT ON FILE" |
                                                                                                                            status_reason2 == "NEVER VERIFIED" |
                                                                                                                            status_reason2 == "INFORMATION NOT ON FILE" |
                                                                                                                            status_reason2 == "NO SSN VERIFICATION", 12, 
                                                                                                                          # Invalid marking
                                                                                                                          ifelse(status_reason2 == "INVALID MARKING", 13, 
                                                                                                                                 # Non matching info
                                                                                                                                 ifelse(status_reason2 == "NON MATCHING INFORMATION" |
                                                                                                                                          status_reason2 == "WRONG INFORMATION", 14, 
                                                                                                                                        # Other issues       
                                                                                                                                        ifelse(status_reason2 == "RE" |
                                                                                                                                                 status_reason2 == "PER NOTIFICATION FROM SOS  10/19/2018" |
                                                                                                                                                 status_reason2 == "REFUSED/DECLINED" |
                                                                                                                                                 status_reason2 == "REGISTRAR ERROR" |
                                                                                                                                                 status_reason2 == "DDS NOT VERIFIED" |
                                                                                                                                                 status_reason2 == "2 BALLOTS ENCLOSED" |
                                                                                                                                                 status_reason2 == "BACK OF BALLOT LEFT BLANK" |
                                                                                                                                                 status_reason2 == "BACK OF YELLOW ENVELOPE NOT FILLED " |
                                                                                                                                                 status_reason2 == "BALLOT NO ENVELOPES", 15, 
                                                                                                                                               
                                                                                                                                               # Multiple issues 
                                                                                                                                               ifelse(status_reason2 == "CURRENT YEAR AS YEAR OF BIRTH, SIG MATCH" |
                                                                                                                                                        status_reason2 == "CURRENT YEAR AS YOB, RES ADDR NOT A MATCH" |
                                                                                                                                                        status_reason2 == "CURRENT YEAR AS YOB, SIG MISSING" |
                                                                                                                                                        status_reason2 == "MISSING YOB AND SIG, RES ADDR NOT A MATCH" |
                                                                                                                                                        status_reason2 == "NO SIG/ NO DOB" |
                                                                                                                                                        status_reason2 == "NO SIGNATURE OR MARK OF ELECTOR" |
                                                                                                                                                        status_reason2 == "OATH & SIGNATURE NO COMPLETED" |
                                                                                                                                                        status_reason2 == "RES ADDR AND SIG MISSING" |
                                                                                                                                                        status_reason2 == "RES ADDR AND YEAR OF BIRTH MISSING" |
                                                                                                                                                        status_reason2 == "RES ADR AND YOB MISSING" |
                                                                                                                                                        status_reason2 == "SIG AND YEAR OF BIRTH MISSING" |
                                                                                                                                                        status_reason2 == "SIG AND YOB NOT A MATCH" |
                                                                                                                                                        status_reason2 == "SIG NON MATCH, ADDITIONAL ID NOT " |
                                                                                                                                                        status_reason2 == "SIG NON MATCH, ADDITIONAL ID NOT " |
                                                                                                                                                        status_reason2 == "SIG NON MATCH; ADDITIONAL ID NOT " |
                                                                                                                                                        status_reason2 == "SIG NOT THERE; ADDITIONAL ID NOT " |
                                                                                                                                                        status_reason2 == "YEAR OF BIRTH AND SIG MISSING" |
                                                                                                                                                        status_reason2 == "YEAR OF BIRTH MISSING, SIG MISSING"|
                                                                                                                                                        status_reason2 == "ADDRESS AND YOB MISSING" |
                                                                                                                                                        status_reason2 == "RES ADDR, YOB MISSING" |
                                                                                                                                                        status_reason2 == "YOB AND RES ADDR MISSING" |
                                                                                                                                                        status_reason2 == "YOB AND SIG MISSING" |
                                                                                                                                                        status_reason2 == "NO OATH OR SIGNATURE" |
                                                                                                                                                        status_reason2 == "INCORRECT ADDRESS AND COUNTY IN OATH" |
                                                                                                                                                        status_reason2 == "YOB AND SIG NOT A MATCH", 16, 
                                                                                                                                                      
                                                                                                                                                      # Voted in person       
                                                                                                                                                      ifelse(status_reason2 == "VOIDED BALLOT" |
                                                                                                                                                               status_reason2 == "VOTED IN-PERSON" |
                                                                                                                                                               status_reason2 == "VOTED ON ELECTION DAY" | 
                                                                                                                                                               status_reason2 == "VOTING IN-PERSON", NA, 18 )))))))))))))))))))

table(reasons$status_reason3, useNA = "ifany")


# recode status reason to fewer categories:
# 1 = late; 2 = oath/signature issue; 3 = date of birth; 4 = address issue; 
# 5 = other issues
reasons$status_reason7 <- with(reasons, ifelse(status_reason3 == 1, 1, 
                                               ifelse(status_reason3 == 2 | status_reason3 == 4, 2, 
                                                      ifelse(status_reason3 == 3, 3, 
                                                             ifelse(status_reason3 == 8 | status_reason3 == 999, 4, 5)))))

table(reasons$status_reason7)
table(reasons$status_reason7, reasons$late.rejected.vbm)
table(reasons$status_reason7, reasons$ontime.rejected.vbm)

round(prop.table(table(reasons$status_reason7)),3)

# create late rejection variable: 1 = late; 0 = on-time
reasons$late.rejected.vbm[is.na(reasons$late.rejected.vbm)] <- 0
table(reasons$late.rejected.vbm)


# Table 1: Raw numbers
# Nr VBM ballots cast
table(master_dataset$VBM_voter, master_dataset$race1)
table(master_dataset$VBM_voter, master_dataset$age2)
table(master_dataset$VBM_voter, master_dataset$female)
table(master_dataset$VBM_voter, master_dataset$reg.year18)


# Nr VBM rejected ballots cast
table(reasons$late.rejected.vbm)
prop.table(table(reasons$late.rejected.vbm))


crosstab(reasons$late.rejected.vbm, reasons$race1, prop.c = T, plot = F)
crosstab(reasons$late.rejected.vbm, reasons$age2, prop.c = T, plot = F)
crosstab(reasons$late.rejected.vbm, reasons$female, prop.c = T, plot = F)
crosstab(reasons$late.rejected.vbm, reasons$reg.year18, prop.c = T, plot = F)

# Table 1: Envelope reasons for rejection
# First have to drop those 20 cases that have been received on time but have a late rejection reason
reasons$status_reason_nolate <- reasons$status_reason7
reasons$status_reason_nolate[reasons$status_reason_nolate == 1] <- NA
table(reasons$status_reason_nolate[reasons$late.rejected.vbm==0])

prop.table(table(reasons$status_reason_nolate[reasons$late.rejected.vbm == 0]))


crosstab(reasons$status_reason_nolate[reasons$late.rejected.vbm == 0], 
         reasons$race1[reasons$late.rejected.vbm == 0], prop.c = T, plot = F)

crosstab(reasons$status_reason_nolate[reasons$late.rejected.vbm == 0], 
         reasons$age2[reasons$late.rejected.vbm == 0], prop.c = T, plot = F)

crosstab(reasons$status_reason_nolate[reasons$late.rejected.vbm == 0], 
         reasons$female[reasons$late.rejected.vbm == 0], prop.c = T, plot = F)

crosstab(reasons$status_reason_nolate[reasons$late.rejected.vbm == 0], 
         reasons$reg.year18[reasons$late.rejected.vbm == 0], prop.c = T, plot = F)



# Combine reasons into 3 categories:
# 1 = late; 2 = signature + oath; 3 = other issues
reasons$status_reason_3cat <- with(reasons, ifelse(status_reason7 == 1, 1, 
                                                   ifelse(status_reason7 == 2 | status_reason7 == 4, 2, 3)))

table(reasons$status_reason_3cat)



#=====================================#
# Analyze reasons for all VBM ballots #
#=====================================#
master_dataset_rev <- master_dataset[complete.cases(late.return1, Status.Reason, vbm_rejected, age2, race1, female, 
                                                    gender_other, VBM_voter, reg.year18,
                                                    education, percapita_income, population, congress_district, 
                                                    senate_district, senate_district, house_district, county_name)]

master_dataset_rev$status_reason2 <- master_dataset_rev$Status.Reason

# write.csv(reasons, file = "reasons.csv")

# Remove special characters from the string
master_dataset_rev$status_reason2 <- gsub('[\"]', '', master_dataset_rev$status_reason2)

# There are 235 unique reasons
length(unique(master_dataset_rev$Status.Reason))


# Reason coding:
# 17 = multiple issues such as missing yob, sign, oath, address 
master_dataset_rev$status_reason3 <- with(master_dataset_rev, 
                                          # Late return
                                          ifelse(status_reason2 == "Ballot Not Returned By Election Day" | 
                                                   status_reason2 == "Ballot Received after Deadline" | 
                                                   status_reason2 == "DID NOT COME IN" |
                                                   status_reason2 == "NEVER CAME IN" |
                                                   status_reason2 == "OATH NOT COMPLETED BY DEADLINE" |
                                                   status_reason2 == "OATH NOT SIGNED BY DEADLINE" |
                                                   status_reason2 == "RECEIVED AFTER DEADLINE/INSUFFICIENT " |
                                                   status_reason2 == "RECEIVED OATH SIGNATURE AFTER " |
                                                   status_reason2 == "RNS BY 11/9" |
                                                   status_reason2 == "TOO LATE" |
                                                   status_reason2 == "RECEIVED AFTER DEADLINE/INSUFFICIENT" |
                                                   status_reason2 == "BALLOT RECEIVED AFTER DEADLINE /TURNED IN AT ", 1, 
                                                 # Signature missing
                                                 ifelse(status_reason2 == "BALLOT NOT SIGNED" |
                                                          status_reason2 == "MISSING SIGNATURE ON OATH" |
                                                          status_reason2 == "NO SIGNATURE" |
                                                          status_reason2 == "NO SIGNATURE ON ENVELOPE" |
                                                          status_reason2 == "NO SIGNATURE ON OATH" |
                                                          status_reason2 == "NO SIGNATURE. DID CALL HER" |
                                                          status_reason2 == "NO SIGNATURE ON YELLOW ENV. NO PHONE # " |
                                                          status_reason2 == "NO SIGNATURE/OATH NOT COMPLETED" |
                                                          status_reason2 == "NO SIGNATURE/OATH NOT COMPLETED" |
                                                          status_reason2 == "OATH NOT SIGNED" |
                                                          status_reason2 == "RECEIVED OATH SIGNATURE AFTER" |
                                                          status_reason2 == "OATH NOT SIGNED BY VOTER" |
                                                          status_reason2 == "SENT SIG MISMATCH LETTER, DIDNT FOLLOW" |
                                                          status_reason2 == "SIG MISSING" |
                                                          status_reason2 == "SIGNATURE DOES NOT MATCH" |
                                                          status_reason2 == "Signature Match" |
                                                          status_reason2 == "SIGNATURE MISMATCH" |
                                                          status_reason2 == "SIGNATURE MISSING" |
                                                          status_reason2 == "SIGNATURE NOT ON BALLOT" |
                                                          status_reason2 == "SIGNATURE NOT RESOLVED" |
                                                          status_reason2 == "SIGNATURE/INFO NOT ON BALLOT" |
                                                          status_reason2 == "DID NOT SIGN OATH OF ELECTOR" |
                                                          status_reason2 == "SENT SIG MISMATCH LETTER, DIDNT FOLLOW " |
                                                          status_reason2 == "SIGNED WRONG LINE" |
                                                          status_reason2 == "THE VOTER DIDN'T SIGN THE OATH OF ELECTOR" |
                                                          status_reason2 == "UNSIGNED INCOMPLETE OATH OF ELECTOR" |
                                                          status_reason2 == "FAIL TO SIGN THE OATH OF ELECTOR" |
                                                          status_reason2 == "VOTER SIGNED ACTUAL BALLOT" |
                                                          status_reason2 == "MISSING SIGNATURE", 2, 
                                                        
                                                        # DOB        
                                                        ifelse(status_reason2 == "BIRTHDATE DOES NOT MATCH" | 
                                                                 status_reason2 == "DATE OF BIRTH DOES NOT MATCH AND NO" |
                                                                 status_reason2 == "BIRTHDATE DOES NOT MATCH" |
                                                                 status_reason2 == "BIRTHDAY DIDN'T MATCH" |
                                                                 status_reason2 == "DATE OF BIRTH DOES NOT MATCH VR CARD ON FILE" |
                                                                 status_reason2 == "DOB DOESN'T MATCH" | 
                                                                 status_reason2 == "DOB INCORRECT" |
                                                                 status_reason2 == "NO DOB" |
                                                                 status_reason2 == "DATE OF BIRTH DOES NOT MATCH AND NO " |
                                                                 status_reason2 == "DOB IS NOT A MATCH" | 
                                                                 status_reason2 == "DOB NOT A MATCH" |
                                                                 status_reason2 == "CURRENT YEAR AS YEAR OF BIRTH" |
                                                                 status_reason2 == "CURRENT YEAR AS YOB" |
                                                                 status_reason2 == "INCORRECT BIRTH DATE IN OATH" |
                                                                 status_reason2 == "INCORRECT DATE OF BIRTH IN OATH" |
                                                                 status_reason2 == "INCORRECT DOB" |
                                                                 status_reason2 == "INVALID BIRTHDATE" |
                                                                 status_reason2 == "LEFT DATE OF BIRTH BLANK" |
                                                                 status_reason2 == "LEFT DOB BLANK ON OATH" |
                                                                 status_reason2 == "WRONG DATE OF BIRTH" |
                                                                 status_reason2 == "WRONG DATE OF BIRTH ON OATH ENVELOPE" |
                                                                 status_reason2 == "WRONG DOB" |
                                                                 status_reason2 == "YEAR OF BIRTH MISSING" |
                                                                 status_reason2 == "YOB NOT A MATCH"|
                                                                 status_reason2 == "BIRTHDAY DIDN'T MATCH", 3, 
                                                               # Oath       
                                                               ifelse(status_reason2 == "BLANK OATH" | 
                                                                        status_reason2 == "BLANK OATH OF ELCECTOR" |
                                                                        status_reason2 == "BLANK OATH OF ELECTOR" |
                                                                        status_reason2 == "BLANK OATH" |
                                                                        status_reason2 == "BLANK OATH OF ELECTORS" |
                                                                        status_reason2 == "BLANK OATH OF ELECTOR" |
                                                                        status_reason2 == "BLANK OATHOF ELECTOR" |
                                                                        status_reason2 == "BLANK OATHOF ELECTORS" |
                                                                        status_reason2 == "INCOMPLETE OATH" |
                                                                        status_reason2 == "LEFT OATH BLANK" |
                                                                        status_reason2 == "NO OATH" |
                                                                        status_reason2 == "LEFT OATH BLANK BUT SIGNED IT" |
                                                                        status_reason2 == "FAIL TO SIGN THE OATH OF ELECTOR" |
                                                                        status_reason2 == "DID NOT RETURN IN OATH ENVELOPE" |
                                                                        status_reason2 == "INSUFFICIENT OATH INFORMATION AND OATH " |
                                                                        status_reason2 == "Insufficient Oath Information" |
                                                                        status_reason2 == "MISSING OATH" | 
                                                                        status_reason2 == "INSUFFICIENT OATH INFORMATION AND OATH" |
                                                                        status_reason2 == "OAT INC" |
                                                                        status_reason2 == "OATH BLANK" |
                                                                        status_reason2 == "OATH COMPLETELY BLANK" |
                                                                        status_reason2 == "OATH INC" |
                                                                        status_reason2 == "OATH INCOMPLETE" |
                                                                        status_reason2 == "OATH MISSING(BLANK)" |
                                                                        status_reason2 == "OATH NOT COMPLETED" |
                                                                        status_reason2 == "OATH RECEIVED BLANK" |
                                                                        status_reason2 == "OATH RETURNED BLANK" |
                                                                        status_reason2 == "WRONG INFO ON OATH" |
                                                                        status_reason2 == "WRONG OATH INFO" |
                                                                        status_reason2 == "DID NOT SIGN OATH OF ELECTOR", 4,
                                                                      # Ineligible       
                                                                      ifelse(status_reason2 == "Changed to Ineligible Status" |
                                                                               status_reason2 == "FWAB/SWAB Ineligible", 5,
                                                                             # Deceased      
                                                                             ifelse(status_reason2 == "DECEASED OCTOBER 21 2018" |
                                                                                      status_reason2 == "VOTER IS DECEASED AS OF THIS MORNING" |
                                                                                      status_reason2 == "REC'D 11/08/18 AS BEING DECEASED", 6,
                                                                                    # Ballot missing
                                                                                    ifelse(status_reason2 == "DID NOT SEND BALLOT BACK WITH ENVELOPE" |
                                                                                             status_reason2 == "NO BALLOT ENCLOSED IN ENVELOPE" |
                                                                                             status_reason2 == "BALLOT NOT ENCLOSED/INSTRUCTION" |
                                                                                             status_reason2 == "NO BALLOT IN ENVELOPE" |
                                                                                             status_reason2 == "NO BALLOT INSIDE ENVELOPE" |
                                                                                             status_reason2 == "NOT ENCLOSED PROPERLY" |
                                                                                             status_reason2 == "BALLOT NOT DELIVERED" |
                                                                                             status_reason2 == "VOTER DID NOT SEND BALLOT BACK IN OATH " |
                                                                                             status_reason2 == "WRONG BALLOT WAS RETURN FROM 7-24-18" |
                                                                                             status_reason2 == "WRONG BALLOT, OMITTED OATH" |   
                                                                                             status_reason2 == "MAILED IN WRITE-IN PAPER", 7, 
                                                                                           # Address
                                                                                           ifelse(status_reason2 == "DIFFERENT ADDR RECORDED" |
                                                                                                    status_reason2 == "INCORRECT ADDRESS AND COUNTY IN OATH" |
                                                                                                    status_reason2 == "INCORRECT ADDRESS ON OATH" |
                                                                                                    status_reason2 == "NO ADDRESS" |
                                                                                                    status_reason2 == "NO RESIDENTIAL ADDRESS" |
                                                                                                    status_reason2 == "INCORRECT RESIDENTIAL ADDRESS" |
                                                                                                    status_reason2 == "NON MATCHING ADDRESS ON OATH ENVELOPE" |
                                                                                                    status_reason2 == "OATH DOES NOT REFLECT RESIDENCE ADDRESS" |
                                                                                                    status_reason2 == "REGISTERED ADRRESS IS MISSING/INCORRECT" |
                                                                                                    status_reason2 == "RES ADDR MISSING" |
                                                                                                    status_reason2 == "RES ADDR NOT A MATCH" |
                                                                                                    status_reason2 == "RESI ADDR DID NOT MATCH" |
                                                                                                    status_reason2 == "RESI ADDR NOT A MATCH" |
                                                                                                    status_reason2 == "STREET ADDRESS MISSING" |
                                                                                                    status_reason2 == "WRONG ADDRESS" |
                                                                                                    status_reason2 == "WRONG COUNTY LISTED IN OATH" |
                                                                                                    status_reason2 == "WRONG RESI ADDR" |
                                                                                                    status_reason2 == "INCORRECT ADDRESS", 8, 
                                                                                                  
                                                                                                  # Ballot returned undelivered
                                                                                                  ifelse(status_reason2 == "Ballot Returned Undeliverable" |
                                                                                                           status_reason2 == "BALLOT NOT DELIVERED" |
                                                                                                           status_reason2 == "BALLOT NOT DELIVERED", 999,
                                                                                                         
                                                                                                         # Wrong person
                                                                                                         ifelse(status_reason2 == "ENTERED THE WRONG PERSON AS ACCEPTED" |
                                                                                                                  status_reason2 == "NO PROOF OF CITIZENSHIP", 9, 
                                                                                                                
                                                                                                                # Felon       
                                                                                                                ifelse(status_reason2 == "FELON" |
                                                                                                                         status_reason2 == "CANCELLED FELON" |
                                                                                                                         status_reason2 == "VOTER IN CAN FELON STATUS", 10, 
                                                                                                                       
                                                                                                                       # Missing ID       
                                                                                                                       ifelse(status_reason2 == "ID NOT PROVIDED" |
                                                                                                                                status_reason2 == "NO ID PROVIDED" |
                                                                                                                                status_reason2 == "PROVISIONAL ID NOT PORVIDED" |
                                                                                                                                status_reason2 == "PROVISIONAL INFORMATION NOT " |
                                                                                                                                status_reason2 == "PROVISIONAL ID NOT PROVIDED" |
                                                                                                                                status_reason2 == "MIDR-No ID", 11, 
                                                                                                                              
                                                                                                                              # Info not on file       
                                                                                                                              ifelse(status_reason2 == "INFO NOT ON FILE" |
                                                                                                                                       status_reason2 == "NEVER VERIFIED" |
                                                                                                                                       status_reason2 == "INFORMATION NOT ON FILE" |
                                                                                                                                       status_reason2 == "NO SSN VERIFICATION", 12, 
                                                                                                                                     
                                                                                                                                     # Invalid marking
                                                                                                                                     ifelse(status_reason2 == "INVALID MARKING", 13, 
                                                                                                                                            
                                                                                                                                            # Non matcing info
                                                                                                                                            ifelse(status_reason2 == "NON MATCHING INFORMATION" |
                                                                                                                                                     status_reason2 == "WRONG INFORMATION", 14, 
                                                                                                                                                   
                                                                                                                                                   # Other issues       
                                                                                                                                                   ifelse(status_reason2 == "RE" |
                                                                                                                                                            status_reason2 == "PER NOTIFICATION FROM SOS  10/19/2018" |
                                                                                                                                                            status_reason2 == "REFUSED/DECLINED" |
                                                                                                                                                            status_reason2 == "REGISTRAR ERROR" |
                                                                                                                                                            status_reason2 == "DDS NOT VERIFIED" |
                                                                                                                                                            status_reason2 == "2 BALLOTS ENCLOSED" |
                                                                                                                                                            status_reason2 == "BACK OF BALLOT LEFT BLANK" |
                                                                                                                                                            status_reason2 == "MISSING REQUIRED POSTMARK" |   
                                                                                                                                                            status_reason2 == "BACK OF YELLOW ENVELOPE NOT FILLED " |
                                                                                                                                                            status_reason2 == "BALLOT NO ENVELOPES", 15, 
                                                                                                                                                          # Multiple issues 
                                                                                                                                                          ifelse(status_reason2 == "CURRENT YEAR AS YEAR OF BIRTH, SIG MATCH" |
                                                                                                                                                                   status_reason2 == "CURRENT YEAR AS YOB, RES ADDR NOT A MATCH" |
                                                                                                                                                                   status_reason2 == "CURRENT YEAR AS YOB, SIG MISSING" |
                                                                                                                                                                   status_reason2 == "MISSING YOB AND SIG, RES ADDR NOT A MATCH" |
                                                                                                                                                                   status_reason2 == "NO SIG/ NO DOB" |
                                                                                                                                                                   status_reason2 == "NO SIGNATURE OR MARK OF ELECTOR" |
                                                                                                                                                                   status_reason2 == "OATH & SIGNATURE NO COMPLETED" |
                                                                                                                                                                   status_reason2 == "RES ADDR AND SIG MISSING" |
                                                                                                                                                                   status_reason2 == "RES ADDR AND YEAR OF BIRTH MISSING" |
                                                                                                                                                                   status_reason2 == "RES ADR AND YOB MISSING" |
                                                                                                                                                                   status_reason2 == "SIG AND YEAR OF BIRTH MISSING" |
                                                                                                                                                                   status_reason2 == "SIG AND YOB NOT A MATCH" |
                                                                                                                                                                   status_reason2 == "SIG NON MATCH, ADDITIONAL ID NOT " |
                                                                                                                                                                   status_reason2 == "SIG NON MATCH, ADDITIONAL ID NOT " |
                                                                                                                                                                   status_reason2 == "SIG NON MATCH; ADDITIONAL ID NOT " |
                                                                                                                                                                   status_reason2 == "SIG NOT THERE; ADDITIONAL ID NOT " |
                                                                                                                                                                   status_reason2 == "YEAR OF BIRTH AND SIG MISSING" |
                                                                                                                                                                   status_reason2 == "YEAR OF BIRTH MISSING, SIG MISSING"|
                                                                                                                                                                   status_reason2 == "ADDRESS AND YOB MISSING" |
                                                                                                                                                                   status_reason2 == "RES ADDR, YOB MISSING" |
                                                                                                                                                                   status_reason2 == "YOB AND RES ADDR MISSING" |
                                                                                                                                                                   status_reason2 == "YOB AND SIG MISSING" |
                                                                                                                                                                   status_reason2 == "NO OATH OR SIGNATURE" |
                                                                                                                                                                   status_reason2 == "INCORRECT ADDRESS AND COUNTY IN OATH" |
                                                                                                                                                                   status_reason2 == "PROV. BALLOT DUE TO SIG. NONMATCH - NO ID" |  
                                                                                                                                                                   status_reason2 == "YOB AND SIG NOT A MATCH", 16, 
                                                                                                                                                                 # Voted in person       
                                                                                                                                                                 ifelse(status_reason2 == "VOIDED BALLOT" |
                                                                                                                                                                          status_reason2 == "VOTED IN-PERSON" |
                                                                                                                                                                          status_reason2 == "VOTED ON ELECTION DAY" | 
                                                                                                                                                                          status_reason2 == "VOTING IN-PERSON", NA, 0 )))))))))))))))))))

table(master_dataset_rev$status_reason3, useNA = "ifany")


# Will convert 999, ballot not delivered to NAs.
master_dataset_rev$status_reason3[master_dataset_rev$status_reason3 == 999] <- NA
table(master_dataset_rev$status_reason3, useNA = "ifany")

# recode status reason to fewer categories:
# 0 = accepted VBM, 
# Rejected VBM: 1 = late; 2 = signature issue; 3 = date of birth; 4 = oath issue; 5 = address issue; 
# 6 = multiple issues; 7 = other issues
master_dataset_rev$status_reason7 <- with(master_dataset_rev, ifelse(status_reason3 == 0, 0, 
                                                                     ifelse(status_reason3 == 1, 1,                       
                                                                            ifelse(status_reason3 == 2, 2, 
                                                                                   ifelse(status_reason3 ==3, 3, 
                                                                                          ifelse(status_reason3 == 4, 4, 
                                                                                                 ifelse(status_reason3 ==8, 5, 
                                                                                                        ifelse(status_reason3 == 16, 6, 7))))))))

table(master_dataset_rev$status_reason7)


# Create the new  rejected VBM variable using reasons
# 0 = accepted VBM; 1 = rejected VBM
master_dataset_rev$vbm.rejected.reason <- with(master_dataset_rev, ifelse(status_reason7 == 0, 0, 1))

table(master_dataset_rev$vbm.rejected.reason)

# Create the new late rejected VBM variable using reasons
# 0 = accepted VBM; 1 = late rejected VBM
master_dataset_rev$late.rejected.reason <- with(master_dataset_rev, ifelse(status_reason7 == 0, 0, 
                                                                           ifelse(status_reason7 == 1, 1, NA)))

table(master_dataset_rev$late.rejected.reason)

# Create the new on time rejected VBM variable using reasons
# 0 = accepted VBM; 1 = on-time rejected VBM
master_dataset_rev$ontime.rejected.reason <- with(master_dataset_rev, ifelse(status_reason7 == 0, 0, 
                                                                             ifelse(status_reason7 >= 2, 1, NA)))

table(master_dataset_rev$ontime.rejected.reason)

round(prop.table(table(master_dataset_rev$status_reason7)),3)
round(prop.table(table(master_dataset_rev$status_reason7)),3)
round(prop.table(table(master_dataset_rev$status_reason7, master_dataset_rev$female)),3)


### show these
round(prop.table(table(master_dataset_rev$status_reason7, master_dataset_rev$race1), 1),3)

# Original dataset
round(prop.table(table(master_dataset$late.rejected.vbm, master_dataset$race1), 2),3)
round(prop.table(table(master_dataset$ontime.rejected.vbm, master_dataset$race1), 2),3)
round(prop.table(table(master_dataset$vbm_rejected, master_dataset$race1), 2),3)
table(master_dataset$vbm_rejected)

# New revised dataset with reasons
round(prop.table(table(master_dataset_rev$late.rejected.reason, master_dataset_rev$race1), 2),3)
round(prop.table(table(master_dataset_rev$ontime.rejected.reason, master_dataset_rev$race1), 2),3)
round(prop.table(table(master_dataset_rev$vbm.rejected.reason, master_dataset_rev$race1), 2),3)
table(master_dataset_rev$vbm.rejected.reason)

# New revised dataset with reasons for age
round(prop.table(table(master_dataset_rev$late.rejected.reason, master_dataset_rev$age2), 2),3)
round(prop.table(table(master_dataset_rev$ontime.rejected.reason, master_dataset_rev$age2), 2),3)
round(prop.table(table(master_dataset_rev$vbm.rejected.reason, master_dataset_rev$age2), 2),3)

round(prop.table(table(master_dataset_rev$vbm.rejected.reason, master_dataset_rev$status_reason3), 1),3)
round(prop.table(table(master_dataset_rev$vbm.rejected.reason, master_dataset_rev$status_reason3), 1),3)





